#!/usr/bin/Rscript
##########################################################################################
# Social Media and Policy Responses to the COVID-19 Pandemic in Switzerland
##########################################################################################
# Description:
##########################################################################################
# Appendix Figures A1 A2 A3
##########################################################################################
# Contents
##########################################################################################
# 1) Dependencies
# 2) Preparations
# 3) Plots
##########################################################################################
# 1) Dependencies
##########################################################################################
library(tidyverse)
library(quanteda)
library(emojifont)
library(ggplot2)
library(readr)
##########################################################################################
# 2) Preparations
##########################################################################################
rm(list=ls())
# - set dir
args = commandArgs()

scriptName = args[substr(args,1,7) == '--file=']

if (length(scriptName) == 0) {
  scriptName <- rstudioapi::getSourceEditorContext()$path
} else {
  scriptName <- substr(scriptName, 8, nchar(scriptName))
}

pathName = substr(
  scriptName, 
  1, 
  nchar(scriptName) - nchar(strsplit(scriptName, '.*[/|\\]')[[1]][2])
)

setwd(pathName)
parent_path <- getwd()

# load ggplot theme of the DigDemLab 
suppressMessages(suppressWarnings(source('ggplot_theme_ddl.R')))


dat_twitter_app <- readRDS("../data/Twitter_app.RDS")
dat_twitter_app$type <- "App"

table(dat_twitter_app$Language)

dat_twitter_mask <- readRDS("../data/Twitter_mask.RDS")
dat_twitter_mask$type <- "Mask"

dat_twitter_covid <- readRDS("../data/Twitter_covid.RDS")
dat_twitter_covid$type <- "COVID-19"

dat_twitter <- bind_rows(dat_twitter_app, 
                         dat_twitter_mask,
                         dat_twitter_covid)

# remove emojis

xvect = c('😂', 'für', '🍹', '😀', 'no', '😛')
gsub('\\p{So}|\\p{Cn}', '', xvect, perl = TRUE)

dat_twitter$text_lower <- gsub('\\p{So}|\\p{Cn}', '', tolower(dat_twitter$Text), perl = TRUE)

corp <- corpus(dat_twitter, text_field = "text_lower")

# create dfm
dfmat <- corp %>%
  #corpus_sample(size = 1000) %>% 
  tokens() %>%
  tokens(remove_punct = TRUE, remove_numbers = TRUE) %>% 
  dfm(remove_punct = TRUE) %>% 
  dfm_remove(pattern = c(stopwords("de"), "dass", "+")) %>% 
  dfm_remove(pattern = '\\p{So}|\\p{Cn}', valuetype = "regex") # remove emojis


# get 50 most frequent words per class
tstat_freq <- textstat_frequency(dfmat, groups = "type", 
                                 n = 50)

# create plot for each of the three groups
group <- unique(tstat_freq$group)

for (i in group) {
  tstat_subset <- subset(tstat_freq, group == i)
  
  pl <- ggplot(tstat_subset, aes(x = reorder(feature, frequency), y = frequency)) +
          geom_bar(stat = "identity", width = 0.2) + 
          geom_point(size = 3) +
          coord_flip() +
          labs(x = NULL, y = "Most frequent terms") +
          ddl_theme(panel.grid.major = element_blank(),
                    panel.grid.minor = element_blank()) +
          theme(legend.position = "bottom", legend.title = element_text(size = 16),
                strip.background = element_blank(), strip.text = element_text(color = "black"),
                axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 0.0, size = 16, color = "black"),  
                axis.text.y = element_text(hjust=0, size = 16, color = "black"),
                axis.title = element_text(size = 16, color = "black"),
                legend.text = element_text(size = 18, color = "black"),
                plot.title = element_blank(),
                strip.text.x = element_text(size = 18, color = "black"),
                plot.margin = unit(c(.6,.6,.6,.6), "cm"),
                axis.line.x = element_line(color="black", size = .5),
                axis.line.y = element_line(color="black", size = .5),
                legend.key.size = unit(2.5,"line")) +
          guides(color = guide_legend(override.aes = list(size=3)))
  
  pl
  
  ggsave(paste0("../images/frequency_", str_to_lower(i), ".pdf"), plot = pl, 
         device = cairo_pdf, width = 12, height = 18)

}
